# -*- coding: utf-8 -*-
import scrapy
from cnblog.items import CnblogItem


class CnblogSpiderSpider(scrapy.Spider):
    name = "cnblog_spider"
    allowed_domains = ["stjj.guizhou.gov.cn"]
    url = 'https://stjj.guizhou.gov.cn/tjsj_35719/zxsj_35810/index.html'
    offset = 1
    start_urls = [url]

    def parse(self, response):


        item = CnblogItem()

        item['title'] = response.xpath('//span[@class="lbx"]/a[@target="_blank"]/text()').extract()       #使用xpath搜索
        item['link'] = response.xpath('//span[@class="lbx"]/a[@target="_blank"]/@href').extract()

        yield item

        print("第{0}页爬取完成".format(self.offset))
        if self.offset < 80:        #爬取到第几页
            self.offset += 1
        url2 = self.url[0:len(self.url)-5]+'_' +str(self.offset)+ self.url[-5:]   #拼接url
        print(url2)
        yield scrapy.Request(url=url2, callback=self.parse)

